Overview

This notebook analyzes gene expression in each cluster across multiple brain regions (SN, PUT, PFC, AMY) in the ASAP-PMDBS snRNA-seq dataset. The workflow focuses on: 1. Data preparation: loading gene-level counts from trusTEr output, organizing by cluster and region. 2. PD vs Control analysis: testing differential expression between PD and control within each cluster and region. 3. PD vs Control GSEA: testing enrichment of GO terms based on DEA (step 2) of each cluster and region. 4. Functional focus: visualizing interferon (IFN)-related signatures across clusters. 5. Visualization: generating mean-difference plots, and violin/boxplots.

Inputs

  • trusTEr output files with gene counts per cluster/region
  • Sample metadata: ASAP_samplesheet.xlsx

Outputs

  • Cluster-level gene count matrices (CSV)
  • Differential expression results (excel files)
  • Gene set enrichment analyses results (excel files)
  • Figures showing:
    • PD vs Control effects
    • IFN enrichment across clusters

Notes

  • Only expressed genes (row sums > 0) are tested.

Setup: Load libraries and helper functions

Load all required libraries and custom DEA functions

library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     shift
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between()      masks data.table::between()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks data.table::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(pheatmap)
library(clusterProfiler)
## 
## clusterProfiler v4.12.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:IRanges':
## 
##     slice
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(EnhancedVolcano)
## Loading required package: ggrepel
source("TE_DEA_functions.R")

Preprocess gene count files

Gather featureCount outputs, extract region and cluster IDs, merge them into cluster-specific count matrices and save them for convenience as .csv

path <- "~/inbox/ASAP/trusTEr_output_vs2"
files <- list.files(path, recursive = T, full.names = T)
files <- files[which(grepl("gene_counts/unique/", files))]
files <- files[which(!endsWith(files, "summary"))]
files <- data.frame(files = files)
files$region <- sapply(str_split(files$files, "/"), `[[`, 7)
files$cluster <- sapply(str_split(files$files, "_uniqueMap"), `[[`, 1)
files$cluster <- sapply(str_split(files$cluster, "/"), tail, 1)
files$cluster_num <- sapply(str_split(files$cluster, "_"), tail, 1)
files$region_cluster <- paste(files$region, files$cluster_num, sep="_")
files_split <- split(files, f = c(files$region_cluster))
regions <- unique(files$region)

for(cluster in as.character(0:7)){
  for(region in regions){
    region_cluster <- paste(region, cluster, sep="_")
    files_region_cluster <- files_split[[region_cluster]]
    for(f in 1:length(files_region_cluster$files)){
      file <- files_region_cluster$files[f]
      if(f == 1){
        gene_counts <- fread(file, data.table = F)
        colnames(gene_counts)[ncol(gene_counts)] <- str_remove_all(sapply(str_split(colnames(gene_counts)[ncol(gene_counts)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
      }else{
        tmp <- fread(file, data.table = F)
        colnames(tmp)[ncol(tmp)] <- str_remove_all(sapply(str_split(colnames(tmp)[ncol(tmp)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
        if(all(tmp$Geneid == gene_counts$Geneid)){
          gene_counts <- cbind(gene_counts, tmp[,ncol(tmp),drop=F])
        }else{
          gene_counts <- merge(gene_counts, tmp[,c("Geneid", colnames(tmp)[ncol(tmp)])], by="Geneid")
        }
      }
    }
    write.csv(gene_counts, paste("~/inbox/ASAP/trusTEr_output_vs2/", region, "/gene_counts_", paste(region, cluster, sep="_"), ".csv", sep=""), row.names = F)
  }
}

Characterize PD effect per cluster

Differential expression analyses for PD vs Control pseudobulks for each region and cluster pair. Save per-cluster results (one excel with region per sheet).

samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
gene_dds_list <- list()
gene_exp_list <- list()
gene_res_list <- list()
gene_res_list_df <- list()
regions <- c("SN", "PUT", "AMY", "PFC")
for(cluster in as.character(0:7)){
  gene_dds_list[[cluster]] <- list()
  gene_exp_list[[cluster]] <- list()
  gene_res_list[[cluster]] <- list()
  gene_res_list_df[[cluster]] <- list()
  for(region in regions){
    print(cluster)
    print(region)
    gene_counts <- list()
    gene_counts[[region]] <- fread(paste("~/inbox/ASAP/trusTEr_output_vs2/", region,"/gene_counts_", region,"_",cluster,".csv", sep=""), sep = ",", data.table = F)
    rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid

    samplesheet_list <- split(samplesheet, f = samplesheet$Region)

    rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
    samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
    samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(gene_counts[[region]]))]
    rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
    rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid
    
    expressed_genes <- rownames(gene_counts[[region]])[which(rowSums(gene_counts[[region]][,samples_cluster]) > 0)]
    gene_dds_list[[cluster]][[region]] <- DESeqDataSetFromMatrix((gene_counts[[region]][expressed_genes,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design =  ~ Dx)
    gene_dds_list[[cluster]][[region]]$Dx <- relevel(gene_dds_list[[cluster]][[region]]$Dx, "Ctl")
    gene_dds_list[[cluster]][[region]] <- DESeq(gene_dds_list[[cluster]][[region]])
    gene_exp_list[[cluster]][[region]] <- getAverage(gene_dds_list[[cluster]][[region]])
    gene_res_list[[cluster]][[region]] <- results(gene_dds_list[[cluster]][[region]], )
    gene_res_list_df[[cluster]][[region]] <- as.data.frame(gene_res_list[[cluster]][[region]])
    gene_res_list_df[[cluster]][[region]]$gene_id <- rownames(gene_res_list_df[[cluster]][[region]])
    print(names(gene_res_list_df[[cluster]]))
  }
  openxlsx::write.xlsx(gene_res_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gene_cluster_", cluster, "_PD_DEA_snRNAseq_res.xlsx", sep=""))
}
## [1] "0"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## [1] "SN"
## [1] "0"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 786 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "0"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 669 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "0"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 348 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "1"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 926 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "1"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 584 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "1"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 602 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "1"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 212 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "2"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 959 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "2"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1245 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "2"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1249 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "2"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 673 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "3"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 899 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "3"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 884 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "3"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 952 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "3"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 327 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "4"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 617 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "4"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 383 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "4"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 459 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "4"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 287 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "5"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1155 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "5"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 844 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "5"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1469 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "5"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 961 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "6"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 398 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "6"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 171 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "6"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 262 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "6"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 187 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"
## [1] "7"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 305 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "7"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 109 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT"
## [1] "7"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 76 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY"
## [1] "7"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 12 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"  "PUT" "AMY" "PFC"

Mean plots of PD vs Control

Visualize mean expression vs log2FC for each cluster-region

volcano_plots_list <- list()
mean_plots_list <- list()
for(cluster in as.character(0:7)){
  volcano_plots_list[[cluster]] <- list()
  mean_plots_list[[cluster]] <- list()
  for(region in regions){ 

    tmp <- EnhancedVolcano(gene_res_list[[cluster]][[region]], 
                                               drawConnectors = T, 
                                               lab = NA, 
                                               x = 'log2FoldChange',
                                               y = 'padj',
                                               pCutoff = 0.05,
                                               FCcutoff = 1)
    tmp$data$Sig <- ifelse(tmp$data$log2FoldChange > 1 & tmp$data$Sig == "FC_P", "Upregulated", 
                                               ifelse(tmp$data$log2FoldChange < -1 & tmp$data$Sig == "FC_P", "Downregulated", "Not significant"))
    tmp$data$Sig <- paste(tmp$data$Sig, table(tmp$data$Sig)[tmp$data$Sig], sep = ": ")
    
    not_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Not significant")]
    up_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Upregulated")]
    down_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Downregulated")]
    
    if(length(unique(tmp$data$Sig)) == 1){
      point_colours <- c("grey")
    }
    if(length(unique(tmp$data$Sig)) == 2){
      if(any(str_detect(unique(tmp$data$Sig), "Upregulated"))){
        point_colours <- c("grey","tomato")
      }
    }
    if(length(unique(tmp$data$Sig)) == 2){
      if(any(str_detect(unique(tmp$data$Sig), "Downregulated"))){
        point_colours <- c("grey","darkturquoise")
      }
    }
    if(length(unique(tmp$data$Sig)) == 3){
      point_colours <- c("grey","darkturquoise", "tomato")
      names(point_colours) <- c(not_sig_lab, down_sig_lab, up_sig_lab)
    }
    point_sizes <- c(1,2,2)
    names(point_sizes) <- c(not_sig_lab, down_sig_lab, up_sig_lab)  
    title <- paste(region, cluster, sep=": cluster ")
    
    tmp <- tmp$data
    volcano_plots_list[[cluster]][[region]] <- ggplot(tmp, aes(x=log2FoldChange, y=-log10(padj), colour=Sig, size=Sig)) + geom_point(alpha=0.7) + 
      geom_vline(xintercept = 1, linetype="dotted") + geom_vline(xintercept = -1, linetype="dotted") + 
      geom_hline(yintercept = -log10(0.05), linetype="dotted") + labs(colour="") + 
      scale_color_manual(values = point_colours)  + 
      scale_size_manual(values = point_sizes, guide="none") +
      theme_bw() +
      theme(text = element_text(size=15), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +
      ggtitle(title)
    
    effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
    mean_plots_list[[cluster]][[region]] <- meanPlot_cus(exp = gene_exp_list[[cluster]][[region]]$Mean,
                                         test = gene_res_list[[cluster]][[region]], 
                                         col1 = ifelse(length(unique(tmp$Sig)) == 3, "tomato", 
                                                       ifelse(any(str_detect(unique(tmp$Sig), "Downregulated")), "grey", "darkturquoise")), 
                                         col2 = ifelse(length(unique(tmp$Sig)) == 3, "darkturquoise", 
                                                       ifelse(any(str_detect(unique(tmp$Sig), "Downregulated")), "darkturquoise", "grey")), 
                                         col3 = ifelse(length(unique(tmp$Sig)) == 3, "grey", 
                                                       ifelse(any(str_detect(unique(tmp$Sig), "Upregulated")), "tomato", "grey")),
                                         c1 = effect, c2="Ctl", p = 0.05, l=1, ttl = title, repel = F)
      
  }
}

mean_plots_list
## $`0`
## $`0`$SN
## Warning: Removed 6653 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$PUT
## Warning: Removed 35564 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$AMY
## Warning: Removed 36930 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$PFC
## Warning: Removed 42538 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`1`
## $`1`$SN
## Warning: Removed 23823 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$PUT
## Warning: Removed 31791 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$AMY
## Warning: Removed 35127 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$PFC
## Warning: Removed 38408 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`2`
## $`2`$SN
## Warning: Removed 33216 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$PUT
## Warning: Removed 35867 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$AMY
## Warning: Removed 35539 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$PFC
## Warning: Removed 36141 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`3`
## $`3`$SN
## Warning: Removed 30840 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$PUT
## Warning: Removed 30380 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$AMY
## Warning: Removed 32243 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$PFC
## Warning: Removed 30826 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`4`
## $`4`$SN
## Warning: Removed 27428 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$PUT
## Warning: Removed 27085 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$AMY
## Warning: Removed 27022 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$PFC
## Warning: Removed 24930 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`5`
## $`5`$SN
## Warning: Removed 37824 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$PUT
## Warning: Removed 35104 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$AMY
## Warning: Removed 34018 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$PFC
## Warning: Removed 34609 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`6`
## $`6`$SN
## Warning: Removed 19241 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$PUT
## Warning: Removed 16485 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$AMY
## Warning: Removed 18119 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$PFC
## Warning: Removed 19972 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`7`
## $`7`$SN
## Warning: Removed 16272 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$PUT
## Warning: Removed 11434 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$AMY
## Warning: Removed 12453 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$PFC
## Warning: Removed 9317 rows containing missing values or values outside the scale range
## (`geom_point()`).

Some helper functions for Gene Set Enrichment Analyses

set.seed(10)
gse_dotplot <- function(df){
  genelist <- df[which(!is.na(df$log2FoldChange)),c("log2FoldChange", "gene_id"), drop=F]
  genelist <- genelist[order(genelist$log2FoldChange, decreasing = T),]

  genelist_FC <- genelist$log2FoldChange
  names(genelist_FC) <- genelist$gene_id
  gse <- gseGO(geneList=genelist_FC,
               ont ="ALL",
               keyType = "SYMBOL",
               minGSSize = 3,
               maxGSSize = 800,
               seed = T,
               pvalueCutoff = 0.05,
               verbose = TRUE,
               OrgDb = org.Hs.eg.db,
               pAdjustMethod = "BH")
  return(gse)
}

Gene Set Enrichment Analyses (GSEA)

For some reason the seed is not taken in by clusterProfiler, so the results from here onwards will be using the original run I did on this code (same but ever so slightly different numbers, loaded here at the end of the chunk). The clean data provided will be the original run I did.

gse_list <- list()
gse_list_df <- list()
gse_plot_list <- list()
for(cluster in as.character(0:7)){
  gse_list[[cluster]] <- list()
  gse_list_df[[cluster]] <- list()
  gse_plot_list[[cluster]] <- list()
  for(region in regions){ 
    print(cluster)
    print(region)
    gse_list[[cluster]][[region]] <- gse_dotplot(df = gene_res_list_df[[cluster]][[region]])
    gse_list_df[[cluster]][[region]] <- gse_list[[cluster]][[region]]@result
    if(!is.null(gse_plot_list[[cluster]][[region]])){
    # gse_plot_list[[cluster]][[region]] <- gse_pretty_dotplot(gse = gse_list[[cluster]][[region]], topn = 30) + ggtitle(paste(region, cluster, sep=" cluster: "))
    }
  }
  if(length(gse_list_df[[cluster]]) > 0){
  openxlsx::write.xlsx(gse_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gse_cluster", cluster, "PD_GSEA_snRNAseq_res.xlsx", sep="_"))
  }
}
## [1] "0"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (90.46% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## no term enriched under specific pvalueCutoff...
## [1] "0"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (24.56% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "0"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 100 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "0"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (13.49% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (29.53% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 8 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.24% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.48% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.53% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.49% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 11 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.45% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.85% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.33% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.44% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.23% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.69% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.06% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.23% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 26 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (22.65% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.79% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "5"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (13.13% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "5"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (14.73% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "5"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (16.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "5"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.51% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (28.86% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "6"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (33.31% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (27.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 2 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## leading edge analysis...
## done...
## [1] "7"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.27% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "7"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (52.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## leading edge analysis...
## done...
## [1] "7"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (62.73% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 7 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "7"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (63.48% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 2 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
# save.image("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/src/r_scripts/gene_uniq_DEA.Rdata")
load("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/src/r_scripts/gene_uniq_DEA.Rdata")

This chunk is just to show you that the results of the gene DEA are indeed identical as before

mean_plots_list
## $`0`
## $`0`$SN
## Warning: Removed 6653 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$PUT
## Warning: Removed 35564 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$AMY
## Warning: Removed 36930 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`0`$PFC
## Warning: Removed 42538 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`1`
## $`1`$SN
## Warning: Removed 23823 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$PUT
## Warning: Removed 31791 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$AMY
## Warning: Removed 35127 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`1`$PFC
## Warning: Removed 38408 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`2`
## $`2`$SN
## Warning: Removed 33216 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$PUT
## Warning: Removed 35867 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$AMY
## Warning: Removed 35539 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`2`$PFC
## Warning: Removed 36141 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`3`
## $`3`$SN
## Warning: Removed 30840 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$PUT
## Warning: Removed 30380 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$AMY
## Warning: Removed 32243 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`3`$PFC
## Warning: Removed 30826 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`4`
## $`4`$SN
## Warning: Removed 27428 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$PUT
## Warning: Removed 27085 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$AMY
## Warning: Removed 27022 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`4`$PFC
## Warning: Removed 24930 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`5`
## $`5`$SN
## Warning: Removed 37824 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$PUT
## Warning: Removed 35104 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$AMY
## Warning: Removed 34018 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`5`$PFC
## Warning: Removed 34609 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`6`
## $`6`$SN
## Warning: Removed 19241 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$PUT
## Warning: Removed 16485 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$AMY
## Warning: Removed 18119 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`6`$PFC
## Warning: Removed 19972 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## 
## $`7`
## $`7`$SN
## Warning: Removed 16272 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$PUT
## Warning: Removed 11434 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$AMY
## Warning: Removed 12453 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $`7`$PFC
## Warning: Removed 9317 rows containing missing values or values outside the scale range
## (`geom_point()`).

Another helper function for plotting the Gene Set Enrichment Analyses results

gse_pretty_dotplot <- function(gse, topn = 10, terms = NA){
  gse <- gse@result
  # gse <- gse[which(gse$ONTOLOGY == "BP"),]
  gse$sign <- ifelse(gse$enrichmentScore > 0, "Activated", "Supressed")
  gse$num_genes <- sapply(str_split(gse$core_enrichment, "/"), length)
  gse$gene_ratio <- gse$num_genes / gse$setSize
  gse$Description <- factor(gse$Description, levels = gse[order(gse$gene_ratio), "Description"])
  
  if(!all(is.na(terms))){
    gse <- gse %>% 
      drop_na() %>% 
      filter(ID %in% terms)
  }else{
    gse <- gse[order(gse$NES, -log10(gse$p.adjust), decreasing = T),]
  }
  gse %>% 
      drop_na() %>% 
      ggplot(aes(x=Description, y=gene_ratio, size = num_genes, fill = p.adjust)) + geom_point(shape=21, colour="lightgrey") + facet_wrap(.~sign) + coord_flip() + theme_pubr(legend = "right", border = T) + labs(x="", fill="Padj", y = "Gene ratio", size = "Num genes") + scale_fill_gradientn(colors = c("firebrick1", "gray94")) + scale_size_continuous(range = c(2,8)) + theme(axis.text.y = element_text(size=10), strip.text.x = element_text(size = 10),axis.text.x = element_text(size=10),legend.text = element_text(size=10),legend.title = element_text(size=10)) 
}